In [1]:
import pandas as pd 
In [2]:
df_monthly =  pd.read_excel('Table_4.3_Natural_Gas_Consumption_by_Sector.xlsx')
df_monthly.head()
Out[2]:
Month NG-Residentional-sector NG-Commercial-sector NG-Industrial-sector NG-Industrial-sector_02 NG-Industrial-sector_01 NG-Industrial-sector_total Natural Gas Consumed by the Industrial Sector, Total NG-Transportation-sector NG-Transportation-sector_01 NG-Transportation-sector_total NG-Electric-Power-sector NG-consumption-total
0 1973-01-01 843.900 392.315 0.0 Not Available 0.0 0.0 810.141 77.544 Not Available 77.544 224.100 2348.0
1 1973-02-01 747.331 394.281 0.0 Not Available 0.0 0.0 698.671 70.212 Not Available 70.212 215.505 2126.0
2 1973-03-01 648.504 310.799 0.0 Not Available 0.0 0.0 731.583 66.546 Not Available 66.546 257.568 2015.0
3 1973-04-01 465.867 231.943 0.0 Not Available 0.0 0.0 807.360 60.602 Not Available 60.602 269.228 1835.0
4 1973-05-01 326.313 174.258 0.0 Not Available 0.0 0.0 852.091 57.101 Not Available 57.101 319.237 1729.0
In [3]:
df_monthly.describe()
Out[3]:
Month NG-Residentional-sector NG-Commercial-sector NG-Industrial-sector NG-Industrial-sector_01 NG-Industrial-sector_total Natural Gas Consumed by the Industrial Sector, Total NG-Transportation-sector NG-Transportation-sector_total NG-Electric-Power-sector NG-consumption-total
count 602 602.000000 602.000000 602.000000 602.000000 602.000000 602.000000 602.000000 602.000000 602.000000 602.000000
mean 1998-01-15 09:36:28.704318976 396.200855 244.061022 91.198199 453.591919 521.442824 715.951678 56.165193 57.505400 465.441158 1879.160161
min 1973-01-01 00:00:00 99.781000 88.740000 0.000000 0.000000 0.000000 376.284000 33.378000 33.378000 137.449000 939.930000
25% 1985-07-08 18:00:00 141.706500 135.193500 86.991250 450.264750 507.036750 647.969750 44.107000 44.921500 260.210000 1523.534000
50% 1998-01-16 12:00:00 314.465500 202.248000 95.589000 510.493500 592.421000 717.020000 50.929000 52.280000 369.297000 1809.537000
75% 2010-07-24 06:00:00 645.060500 353.924000 108.682250 565.752000 659.294250 786.870500 65.579000 66.447000 636.384750 2198.633500
max 2023-02-01 00:00:00 1037.197000 571.744000 173.404000 769.000000 816.696000 1003.192000 132.302000 136.817000 1400.013000 3591.691000
std NaN 267.547294 124.109428 42.335883 193.356394 224.952325 105.157712 16.741735 17.640045 260.427382 494.798258
In [4]:
list(df_monthly)
Out[4]:
['Month',
 'NG-Residentional-sector',
 'NG-Commercial-sector',
 'NG-Industrial-sector',
 'NG-Industrial-sector_02',
 'NG-Industrial-sector_01',
 'NG-Industrial-sector_total',
 'Natural Gas Consumed by the Industrial Sector, Total',
 'NG-Transportation-sector',
 'NG-Transportation-sector_01',
 'NG-Transportation-sector_total',
 'NG-Electric-Power-sector',
 'NG-consumption-total']
In [5]:
df_monthly.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 602 entries, 0 to 601
Data columns (total 13 columns):
 #   Column                                                Non-Null Count  Dtype         
---  ------                                                --------------  -----         
 0   Month                                                 602 non-null    datetime64[ns]
 1   NG-Residentional-sector                               602 non-null    float64       
 2   NG-Commercial-sector                                  602 non-null    float64       
 3   NG-Industrial-sector                                  602 non-null    float64       
 4   NG-Industrial-sector_02                               602 non-null    object        
 5   NG-Industrial-sector_01                               602 non-null    float64       
 6   NG-Industrial-sector_total                            602 non-null    float64       
 7   Natural Gas Consumed by the Industrial Sector, Total  602 non-null    float64       
 8   NG-Transportation-sector                              602 non-null    float64       
 9   NG-Transportation-sector_01                           602 non-null    object        
 10  NG-Transportation-sector_total                        602 non-null    float64       
 11  NG-Electric-Power-sector                              602 non-null    float64       
 12  NG-consumption-total                                  602 non-null    float64       
dtypes: datetime64[ns](1), float64(10), object(2)
memory usage: 61.3+ KB
In [6]:
df_monthly.isna().sum()
Out[6]:
Month                                                   0
NG-Residentional-sector                                 0
NG-Commercial-sector                                    0
NG-Industrial-sector                                    0
NG-Industrial-sector_02                                 0
NG-Industrial-sector_01                                 0
NG-Industrial-sector_total                              0
Natural Gas Consumed by the Industrial Sector, Total    0
NG-Transportation-sector                                0
NG-Transportation-sector_01                             0
NG-Transportation-sector_total                          0
NG-Electric-Power-sector                                0
NG-consumption-total                                    0
dtype: int64
In [7]:
import plotly.express as px

def plot_time_series(df, title, save_file):
    fig = px.line(df, x='Month')
    
    for column in df.columns[1:]:
        fig.add_scatter(x=df['Month'], y=df[column], name=column, mode='lines+markers')
    
    fig.update_layout(title=title)
    fig.write_html(save_file)
    fig.show()
    
    # Assuming your dataframe is named 'df'
title = "Natural Gas Consumption by Each Sector"
save_file = "NaturalGasConsumption.html"
plot_time_series(df_monthly, title, save_file)
In [8]:
df_monthly
Out[8]:
Month NG-Residentional-sector NG-Commercial-sector NG-Industrial-sector NG-Industrial-sector_02 NG-Industrial-sector_01 NG-Industrial-sector_total Natural Gas Consumed by the Industrial Sector, Total NG-Transportation-sector NG-Transportation-sector_01 NG-Transportation-sector_total NG-Electric-Power-sector NG-consumption-total
0 1973-01-01 843.900 392.315 0.000 Not Available 0.000 0.000 810.141 77.544 Not Available 77.544 224.100 2348.000
1 1973-02-01 747.331 394.281 0.000 Not Available 0.000 0.000 698.671 70.212 Not Available 70.212 215.505 2126.000
2 1973-03-01 648.504 310.799 0.000 Not Available 0.000 0.000 731.583 66.546 Not Available 66.546 257.568 2015.000
3 1973-04-01 465.867 231.943 0.000 Not Available 0.000 0.000 807.360 60.602 Not Available 60.602 269.228 1835.000
4 1973-05-01 326.313 174.258 0.000 Not Available 0.000 0.000 852.091 57.101 Not Available 57.101 319.237 1729.000
... ... ... ... ... ... ... ... ... ... ... ... ... ...
597 2022-10-01 242.225 223.715 172.865 110.718 575.114 685.832 858.697 87.152 4.514 91.666 949.669 2365.973
598 2022-11-01 516.416 356.137 167.934 114.577 608.257 722.834 890.768 102.143 4.368 106.512 903.112 2772.945
599 2022-12-01 839.755 495.846 169.487 117.972 636.396 754.368 923.855 124.574 4.514 129.088 993.345 3381.890
600 2023-01-01 800.256 475.998 173.404 122.121 644.956 767.077 940.481 121.970 4.514 126.484 967.981 3311.199
601 2023-02-01 688.462 426.106 157.124 108.897 593.850 702.747 859.871 109.013 4.077 113.090 871.925 2959.454

602 rows × 13 columns

In [9]:
from pmdarima import auto_arima
import plotly.graph_objects as go

for sector_name in list(df_monthly):
    if sector_name != 'Month':
        try:
            fig = go.Figure()

            sector_df = {
                'Monthly-Time': df_monthly['Month'],
                sector_name: df_monthly[sector_name]
            }
            sector_df = pd.DataFrame.from_dict(sector_df)

            df_filter_index = sector_df.set_index('Monthly-Time')

            # Prepare the data for modeling
            years = df_filter_index.index
            energy_consumption = df_filter_index.values.flatten()

            # Split the data into training and testing
            horizan = -20
            train_data = energy_consumption[:horizan]
            test_data = energy_consumption[horizan:]

            print('Length of Train Data: {}\nLength of Test Data: {}'.format(len(train_data), len(test_data)))

            # Fit the auto ARIMA model
            model = auto_arima(train_data, seasonal=True)
            model.fit(train_data)

            # Generate predictions
            predictions = model.predict(n_periods=len(test_data))
            predictions_ahead_in_future = model.predict(n_periods=80)

            # Plot the training data
            fig.add_trace(go.Scatter(
                x=years[:horizan], y=train_data, mode='lines+markers', name='Training Data'))

            # Plot the testing data and predictions
            fig.add_trace(go.Scatter(
                x=years[horizan:], y=test_data, mode='lines+markers', name='Testing Data'))
            fig.add_trace(go.Scatter(
                x=years[horizan:], y=predictions, mode='lines+markers', name='Predictions'))

            # Plot the predictions for the future
            fig.add_trace(go.Scatter(
                x=pd.date_range(start=years[horizan], periods=80, freq='MS'),
                y=predictions_ahead_in_future, mode='lines+markers', name='80 months Future Predictions'))

            # Update the layout
            fig.update_layout(title=f'Natural Gas Energy Consumption Forecast Country: Sector {sector_name}',
                              xaxis_title='Year', yaxis_title='Energy Consumption')

            # Show the plot
            fig.show()

        except:
            print('Error Occurred in the Given Sector:', sector_name)
Length of Train Data: 582
Length of Test Data: 20
Length of Train Data: 582
Length of Test Data: 20
Length of Train Data: 582
Length of Test Data: 20
Length of Train Data: 582
Length of Test Data: 20
Error Occurred in the Given Sector: NG-Industrial-sector_02
Length of Train Data: 582
Length of Test Data: 20
Length of Train Data: 582
Length of Test Data: 20
Length of Train Data: 582
Length of Test Data: 20
Length of Train Data: 582
Length of Test Data: 20
Length of Train Data: 582
Length of Test Data: 20
Error Occurred in the Given Sector: NG-Transportation-sector_01
Length of Train Data: 582
Length of Test Data: 20
Length of Train Data: 582
Length of Test Data: 20
Length of Train Data: 582
Length of Test Data: 20
In [10]:
# import pandas as pd
# import plotly.graph_objects as go
# from statsmodels.tsa.arima.model import ARIMA
# from dateutil.relativedelta import relativedelta
# import numpy as np
# from pmdarima import auto_arima


# # Iterate over each country and sector
# for country in df['County'].unique():
#     for sector in df['Sector'].unique():
#         fig = go.Figure()

#         # Get the energy consumption data for the current country and sector
#         df_filter = df[(df['County'] == country) & (
#             df['Sector'] == sector)][['Timestamp', 'Yearly Data']]
#         df_filter_index = df_filter.set_index('Timestamp')

#         # Prepare the data for modeling
#         years = df_filter_index.index
#         energy_consumption = df_filter_index.values.flatten()

#         # Split the data into training and testing
#         # Use all data except the last 5 years for training
#         train_data = energy_consumption[:-5]
#         test_data = energy_consumption[-5:]  # Use the last 5 years for testing

#         # Fit the auto ARIMA model
#         model = auto_arima(train_data, seasonal=False)
#         model.fit(train_data)

#         # Generate predictions
#         predictions = model.predict(n_periods=len(test_data))

#         # Plot the training data
#         fig.add_trace(go.Scatter(
#             x=years[:-5], y=train_data, mode='lines+markers', name='Training Data'))

#         # Plot the predictions
#         fig.add_trace(go.Scatter(
#             x=years[-5:], y=test_data, mode='lines+markers', name='Testing Data'))
#         fig.add_trace(go.Scatter(
#             x=years[-5:], y=predictions, mode='lines+markers', name='Predictions'))

#         # Update the layout
#         fig.update_layout(title=f'Energy Consumption Forecast Country : {country} : Sector {sector} ',
#                           xaxis_title='Year', yaxis_title='Energy Consumption')

#         # Show the plot
#         fig.show()